# !unzip NYPD_Complaint_Data_Histroic_2020_2022.csv.zip
Archive: NYPD_Complaint_Data_Histroic_2020_2022.csv.zip inflating: NYPD_Complaint_Data_Histroic_2020_2022.csv
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.read_csv("NYPD_Complaint_Data_Histroic_2020_2022.csv")
df
| CMPLNT_NUM | CMPLNT_FR_DT | CMPLNT_FR_TM | CMPLNT_TO_DT | CMPLNT_TO_TM | ADDR_PCT_CD | RPT_DT | KY_CD | OFNS_DESC | PD_CD | ... | SUSP_SEX | TRANSIT_DISTRICT | Latitude | Longitude | Lat_Lon | PATROL_BORO | STATION_NAME | VIC_AGE_GROUP | VIC_RACE | VIC_SEX | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 210240572 | 02/28/2020 | 15:15:00 | NaN | (null) | 106.0 | 02/28/2020 | 344 | ASSAULT 3 & RELATED OFFENSES | 101.0 | ... | M | NaN | 40.682398 | -73.840072 | (40.682398279333, -73.8400721577516) | PATROL BORO QUEENS SOUTH | (null) | <18 | BLACK | M |
| 1 | 210881300 | 03/10/2020 | 21:30:00 | NaN | (null) | 67.0 | 03/11/2020 | 578 | HARRASSMENT 2 | 638.0 | ... | M | NaN | 40.648851 | -73.951017 | (40.6488507469884, -73.951016510623) | PATROL BORO BKLYN SOUTH | (null) | 45-64 | BLACK | M |
| 2 | 213858183 | 05/28/2020 | 08:15:00 | NaN | (null) | 43.0 | 05/28/2020 | 107 | BURGLARY | 231.0 | ... | (null) | NaN | 40.822912 | -73.870041 | (40.8229123084767, -73.8700413043181) | PATROL BORO BRONX | (null) | UNKNOWN | UNKNOWN | D |
| 3 | 213831383 | 05/31/2020 | 13:45:00 | NaN | (null) | 47.0 | 05/31/2020 | 578 | HARRASSMENT 2 | 638.0 | ... | U | NaN | 40.887314 | -73.847272 | (40.8873136344706, -73.8472717577564) | PATROL BORO BRONX | (null) | 25-44 | BLACK | M |
| 4 | 213756171 | 06/01/2020 | 16:00:00 | NaN | (null) | 52.0 | 06/01/2020 | 578 | HARRASSMENT 2 | 638.0 | ... | (null) | NaN | 40.869470 | -73.879861 | (40.8694704770483, -73.8798608037303) | PATROL BORO BRONX | (null) | 25-44 | BLACK | F |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1395506 | 261171983 | 12/30/2022 | 12:31:00 | NaN | (null) | 106.0 | 12/31/2022 | 341 | PETIT LARCENY | 349.0 | ... | U | NaN | 40.675259 | -73.854501 | (40.67525903, -73.85450113) | PATROL BORO QUEENS SOUTH | (null) | 25-44 | ASIAN / PACIFIC ISLANDER | F |
| 1395507 | 261175492 | 12/31/2022 | 11:56:00 | 12/31/2022 | 17:08:00 | 44.0 | 12/31/2022 | 359 | OFFENSES AGAINST PUBLIC ADMINI | 759.0 | ... | F | NaN | 40.829632 | -73.915209 | (40.829632, -73.915209) | PATROL BORO BRONX | (null) | 65+ | BLACK HISPANIC | M |
| 1395508 | 261147482 | 12/30/2022 | 17:20:00 | 12/30/2022 | 17:28:00 | 17.0 | 12/30/2022 | 344 | ASSAULT 3 & RELATED OFFENSES | 101.0 | ... | M | NaN | 40.761265 | -73.968887 | (40.761265, -73.968887) | PATROL BORO MAN SOUTH | (null) | 25-44 | BLACK | M |
| 1395509 | 261179651 | 12/31/2022 | 20:20:00 | 12/31/2022 | 20:26:00 | 112.0 | 12/31/2022 | 348 | VEHICLE AND TRAFFIC LAWS | 916.0 | ... | (null) | NaN | 40.725681 | -73.857270 | (40.72568117, -73.8572701) | PATROL BORO QUEENS NORTH | (null) | 18-24 | WHITE | F |
| 1395510 | 261157928 | 12/31/2022 | 04:00:00 | NaN | (null) | 52.0 | 12/31/2022 | 106 | FELONY ASSAULT | 109.0 | ... | M | NaN | 40.870942 | -73.891499 | (40.870942, -73.891499) | PATROL BORO BRONX | (null) | 25-44 | BLACK | M |
1395511 rows × 35 columns
NYPD_df = df[['CMPLNT_FR_TM','RPT_DT','OFNS_DESC','LAW_CAT_CD','VIC_AGE_GROUP','VIC_RACE','VIC_SEX']]
NYPD_df
| CMPLNT_FR_TM | RPT_DT | OFNS_DESC | LAW_CAT_CD | VIC_AGE_GROUP | VIC_RACE | VIC_SEX | |
|---|---|---|---|---|---|---|---|
| 0 | 15:15:00 | 02/28/2020 | ASSAULT 3 & RELATED OFFENSES | MISDEMEANOR | <18 | BLACK | M |
| 1 | 21:30:00 | 03/11/2020 | HARRASSMENT 2 | VIOLATION | 45-64 | BLACK | M |
| 2 | 08:15:00 | 05/28/2020 | BURGLARY | FELONY | UNKNOWN | UNKNOWN | D |
| 3 | 13:45:00 | 05/31/2020 | HARRASSMENT 2 | VIOLATION | 25-44 | BLACK | M |
| 4 | 16:00:00 | 06/01/2020 | HARRASSMENT 2 | VIOLATION | 25-44 | BLACK | F |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 1395506 | 12:31:00 | 12/31/2022 | PETIT LARCENY | MISDEMEANOR | 25-44 | ASIAN / PACIFIC ISLANDER | F |
| 1395507 | 11:56:00 | 12/31/2022 | OFFENSES AGAINST PUBLIC ADMINI | MISDEMEANOR | 65+ | BLACK HISPANIC | M |
| 1395508 | 17:20:00 | 12/30/2022 | ASSAULT 3 & RELATED OFFENSES | MISDEMEANOR | 25-44 | BLACK | M |
| 1395509 | 20:20:00 | 12/31/2022 | VEHICLE AND TRAFFIC LAWS | MISDEMEANOR | 18-24 | WHITE | F |
| 1395510 | 04:00:00 | 12/31/2022 | FELONY ASSAULT | FELONY | 25-44 | BLACK | M |
1395511 rows × 7 columns
crime_mapping = {
'HARRASSMENT 2': 'HARASSMENT',
'ESCAPE 3': 'ESCAPE',
'ASSAULT 3 & RELATED OFFENSES': 'ASSAULT & RELATED OFFENSES',
'CRIMINAL MISCHIEF & RELATED OF': 'CRIMINAL MISCHIEF',
'OFF. AGNST PUB ORD SENSBLTY &': 'OFFENSES AGAINST PUBLIC ORDER/ADMINISTRATION',
'OTHER STATE LAWS (NON PENAL LA': 'OTHER STATE LAWS (NON PENAL LAW)',
'ENDAN WELFARE INCOMP': 'ENDANGERING WELFARE OF INCOMPETENT',
'AGRICULTURE & MRKTS LAW-UNCLASSIFIED': 'AGRICULTURE & MARKETS LAW',
'DISRUPTION OF A RELIGIOUS SERV': 'DISRUPTION OF A RELIGIOUS SERVICE',
'LOITERING/GAMBLING (CARDS, DIC': 'GAMBLING',
'OFFENSES AGAINST MARRIAGE UNCL': 'OFFENSES AGAINST MARRIAGE',
'HOMICIDE-NEGLIGENT,UNCLASSIFIE': 'HOMICIDE-NEGLIGENT',
'E': 'UNKNOWN',
'D': 'BUSINESS/ORGANIZATION',
'F': 'FEMALE',
'M': 'MALE'
}
def get_map(x):
if x not in crime_mapping : return x
return crime_mapping.get(x, x)
NYPD_df["OFNS_DESC"] = NYPD_df["OFNS_DESC"].map(get_map)
print(NYPD_df["OFNS_DESC"].value_counts())
PETIT LARCENY 284870
HARASSMENT 224636
ASSAULT & RELATED OFFENSES 146673
CRIMINAL MISCHIEF 136687
GRAND LARCENY 128647
...
NEW YORK CITY HEALTH CODE 13
INTOXICATED/IMPAIRED DRIVING 8
DISRUPTION OF A RELIGIOUS SERVICE 4
FORTUNE TELLING 1
OFFENSES AGAINST MARRIAGE 1
Name: OFNS_DESC, Length: 63, dtype: int64
<ipython-input-66-1808c16af01e>:5: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
sex_mapping = {
'F': 'FEMALE',
'M': 'MALE'
}
def sex_map(x):
if x not in sex_mapping : return "UNKNOWN"
return sex_mapping.get(x, x)
NYPD_df["VIC_SEX"] = NYPD_df["VIC_SEX"].map(sex_map)
print(NYPD_df["VIC_SEX"].value_counts())
FEMALE 550097 MALE 495695 UNKNOWN 349719 Name: VIC_SEX, dtype: int64
<ipython-input-67-f1aca97aca78>:10: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
NYPD_df
| CMPLNT_FR_TM | RPT_DT | OFNS_DESC | LAW_CAT_CD | VIC_AGE_GROUP | VIC_RACE | VIC_SEX | |
|---|---|---|---|---|---|---|---|
| 0 | 15:15:00 | 02/28/2020 | ASSAULT & RELATED OFFENSES | MISDEMEANOR | <18 | BLACK | MALE |
| 1 | 21:30:00 | 03/11/2020 | HARASSMENT | VIOLATION | 45-64 | BLACK | MALE |
| 2 | 08:15:00 | 05/28/2020 | BURGLARY | FELONY | UNKNOWN | UNKNOWN | UNKNOWN |
| 3 | 13:45:00 | 05/31/2020 | HARASSMENT | VIOLATION | 25-44 | BLACK | MALE |
| 4 | 16:00:00 | 06/01/2020 | HARASSMENT | VIOLATION | 25-44 | BLACK | FEMALE |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 1395506 | 12:31:00 | 12/31/2022 | PETIT LARCENY | MISDEMEANOR | 25-44 | ASIAN / PACIFIC ISLANDER | FEMALE |
| 1395507 | 11:56:00 | 12/31/2022 | OFFENSES AGAINST PUBLIC ADMINI | MISDEMEANOR | 65+ | BLACK HISPANIC | MALE |
| 1395508 | 17:20:00 | 12/30/2022 | ASSAULT & RELATED OFFENSES | MISDEMEANOR | 25-44 | BLACK | MALE |
| 1395509 | 20:20:00 | 12/31/2022 | VEHICLE AND TRAFFIC LAWS | MISDEMEANOR | 18-24 | WHITE | FEMALE |
| 1395510 | 04:00:00 | 12/31/2022 | FELONY ASSAULT | FELONY | 25-44 | BLACK | MALE |
1395511 rows × 7 columns
NYPD_df.isna().sum()
CMPLNT_FR_TM 0 RPT_DT 0 OFNS_DESC 0 LAW_CAT_CD 0 VIC_AGE_GROUP 0 VIC_RACE 0 VIC_SEX 0 dtype: int64
NYPD_df['OFNS_DESC'].value_counts()
PETIT LARCENY 284870
HARASSMENT 224636
ASSAULT & RELATED OFFENSES 146673
CRIMINAL MISCHIEF 136687
GRAND LARCENY 128647
...
NEW YORK CITY HEALTH CODE 13
INTOXICATED/IMPAIRED DRIVING 8
DISRUPTION OF A RELIGIOUS SERVICE 4
FORTUNE TELLING 1
OFFENSES AGAINST MARRIAGE 1
Name: OFNS_DESC, Length: 63, dtype: int64
- Type of Crimes
- Analyzing a Specific Crime
import plotly.express as px
fig = px.histogram(NYPD_df, x='OFNS_DESC', color='VIC_SEX')
fig.show()